rm(list = ls())
library(countrycode)
library(pscl)
library(foreign)
library(emIRT)
library(readstata13)
library(wnominate)
library(oc)
library(dummies)

setwd("~/Desktop/QJPS Replication")

#This file creates ideal point estimates for countries using a variety of different methods. 
#They are computationally intensive and in some cases will take upwards of 12 hours to run.

#read in the roll-call matrix
data <- read.csv("data/roll_call_matrix.csv")
data <- data[order(data$cowcode),]

##Miller et all democracy data
dem <- read.dta13("data/ccpcnc_v2_small.dta")

#combine democracy data with roll call matrix
data$match <- paste(data$cowcode, data$year)
dem$match <- paste(dem$cowcode, dem$year)
m <- match(data$match, dem$match)
table(is.na(m))

dem$country <- as.character(dem$country)
data$country_name <- dem$country[m]
data$country_name <- as.character(data$country_name)

#get rid of Mexico
data <- data[data$country_name != "Mexico",]

#subset data to only those that had a change occur
#some of the original data are missing because the CCP people haven't coded it yet. For example, the Bahamas only enter the datset in 2002, but became a country in 1973.
data2 <- data[data$evnt == 1,]
data2 <- data2[,11:(ncol(data2)-3)]

#How many missing values are there for each entry
num.missing <- apply(data2, 1, function(x)sum(ifelse(is.na(x), 1, 0)))
prop.missing <- num.missing / ncol(data2)

#Get rid of those entries with more than 90 percent missing
data3 <- data2[prop.missing < .9,]

#make the matrix a roll call object
mat <- rollcall(data3, yea = 1, nay = 0, legis.names = rownames(data3), vote.names = colnames(data3))

#scale the roll call matrix
#These take several hours to complete.

####################################################################
#Bayesian Model - This is the main method we use in the paper######
####################################################################

#1 dimensional model
set.seed(4227492)
idealpoints <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 1, store.item = T, normalize = T)

#extract the means of the posterior estimates for each observation
d1 <- as.data.frame(apply(idealpoints$x, 2, function(x) quantile(x, .5, na.rm =T)))
lower <- as.data.frame(apply(idealpoints$x, 2, function(x) quantile(x, .05, na.rm =T)))
upper <- apply(idealpoints$x, 2, function(x) quantile(x, .95, na.rm =T))

names <- as.character(data$country_name[data$evnt == 1])[prop.missing < .9]
years <- data$year[data$evnt == 1][prop.missing < .9]
cow <- data$cowcode[data$evnt == 1][prop.missing < .9]
nameyear <- paste(names, years, sep = "-")

points <- data.frame(d1, lower, upper, names, cow, years)
names(points) <- c("median", "upper", "lower", "name", "cown", "year")
points$names2 <- countrycode(points$cow, "cown", "iso2c")

write.csv(points, "all_countries_ideal.csv")

######################
#WNOMINATE METHOD
######################

#scale that beautiful roll call matrix
set.seed(4227492)
idealpoints <- wnominate(mat, dims = 1, polarity = 1, lop = 0.05)

points <- as.data.frame(idealpoints$legislators$coord1D)

names <- as.character(data$country_name[data$evnt == 1][prop.missing < .9])
ccode <- as.character(data$cowcode[data$evnt == 1][prop.missing < .9])
year <- as.character(data$year[data$evnt == 1][prop.missing < .9])

points$name <- names
points$ccode <- ccode
points$year <- year
names(points) <- c("coord1D", "Name", "cowcode", "year")

write.csv(points, "all_countries_WNOMINATE.csv")
          
          
###########################
#OPTIMAL CLASSIFICATION###
###########################
          
idealpoints <- oc(mat, dims = 1, polarity = c(1))
ocpoints <- as.data.frame(idealpoints$legislators)
          
names <- as.character(data$country_name[data$evnt == 1][prop.missing < .9])
ccode <- as.character(data$cowcode[data$evnt == 1][prop.missing < .9])
year <- as.character(data$year[data$evnt == 1][prop.missing < .9])
          
ocpoints$name <- names
ocpoints$ccode <- ccode
ocpoints$year <- year
      
write.csv(ocpoints, "all_countries_oc2.csv")


################################################
### Hierarchical model within countries ########
###############################################

#VOTE DATA
#y = matrix of observed votes
y <- NULL
for (j in 1:nrow(data3)){
  y <-c(y, as.numeric(data3[j,]))
  print(j)
}

y <- as.matrix(y)
y[y == 0] <- -1
y[is.na(y)] <- 0

#j = integer matrix of indexes of the bill/item j[l] linked to each observed vote
j <- NULL
for (k in 1:nrow(data3)){
  j <- c(j, seq(1:ncol(data3))-1)
  print(k)
}

j <- as.matrix(j)


#get the country names for all observations and then get the unique list of country names
names <- as.character(data$country_name[data$evnt == 1][prop.missing < .9])
years <- as.character(data$year[data$evnt == 1][prop.missing < .9])

uniq <- unique(names)

#g = integer matrix of indexes of the group membership g[i[l]] linked to each ideal point
a <- NULL
for (i in 1:length(uniq)){
  a <- c(a, rep(i-1, length(names[names == uniq[i]])))
}
g <- as.matrix(a)

#z1 is a 1 (intercept) for each group
z1 <- rep(1, length(g))


#z2 is an index within each group (count within country)
uniq <- unique(names)
a <- NULL
for (i in 1:length(uniq)){
  a <- c(a, seq(1:length(names[names == uniq[i]])) -1 )
}
z2 <- a
z3 <- a^2

#link the two together
z <- as.matrix(cbind(z1, z2, z3))

#integer matrix of indexes of the ideal point i[l] linked to each observed vote
i <- NULL
for (k in 1:nrow(data3)){
  i <- c(i, rep(k-1, ncol(data3)))
  print(k)
}
i <- as.matrix(i)

###########

ND <- 3

NG <- length(uniq)

NI <- nrow(data3)

NJ <- ncol(data3)

NL <- length(y)


hier.const <- list(y=y, i=i, j=j, g=g, z=z, ND = ND, NG = NG, NI = NI, NJ = NJ, NL = NL)

#####STARTS

alpha <- as.matrix(rep(0, ncol(data3)))

beta <- as.matrix(rep(0, ncol(data3)))

gamma <- matrix(1, nrow = nrow(data3), ncol = 3)

eta <- as.matrix(rep(0, nrow(data3)))

sigma <- as.matrix(rep(0, length(uniq)))

starts <- list(alpha = alpha, beta = beta, gamma = gamma, eta = eta, sigma = sigma)

#####PRIORS

beta.mu <- as.matrix(c(0,0))

beta.sigma <- as.matrix(cbind(c(25, 0), c(0, 25)))

gamma.mu <- as.matrix(rep(0, 3))

gamma.sigma <- matrix(0, nrow = 3, ncol = 3)

diag(gamma.sigma) <- 25

sigma.s <- as.matrix(1e-08)

sigma.v <- as.matrix(1e+08)

priors <- list(beta.mu = beta.mu, beta.sigma = beta.sigma, gamma.mu = gamma.mu, gamma.sigma = gamma.sigma, sigma.s = sigma.s, sigma.v = sigma.v)

#RUN THE MODEL
set.seed(4227492)
lout <- hierIRT(.data = hier.const, .starts = starts, .priors = priors, 
                .control = list(threads = 1, verbose = TRUE))

means <- lout$means$x_implied
means <- as.data.frame(means)

names <- as.character(data$country_name[data$evnt == 1][prop.missing < .9])
ccode <- as.character(data$cowcode[data$evnt == 1][prop.missing < .9])
year <- as.character(data$year[data$evnt == 1][prop.missing < .9])

means$name <- names
means$ccode <- ccode
means$year <- year

#SAVE THE DATA
write.csv(means, "all_countries_hierarchical.csv")


##################################################
### Heirarchical Model by Colonial Origin ########
##################################################
#get the colonial origin names for all observations and then get the unique list of country names
qog <- read.csv("data/qog_std_ts_jan17.csv")
table(qog$ht_colonial)
qog2 <- qog[!is.na(qog$ht_colonial),]

m <- match(data$cowcode, qog2$ccodecow)
table(is.na(m))
data$colony_origin <- qog2$ht_colonial[m]
data$colony_origin[is.na(data$colony_origin)] <- 0
data <- data[order(data$colony_origin),]

data2 <- data[data$evnt == 1,]
data2 <- data2[,11:(ncol(data2)-3)]
num.missing <- apply(data2, 1, function(x)sum(ifelse(is.na(x), 1, 0)))
prop.missing <- num.missing / ncol(data2)

#Get rid of those entries with more than 90 percent missing
data3 <- data2[prop.missing < .9,]

#VOTE DATA
#y = matrix of observed votes
y <- NULL
for (j in 1:nrow(data3)){
  y <-c(y, as.numeric(data3[j,]))
  print(j)
}

y <- as.matrix(y)
y[y == 0] <- -1
y[is.na(y)] <- 0


#j = integer matrix of indexes of the bill/item j[l] linked to each observed vote
j <- NULL
for (k in 1:nrow(data3)){
  j <- c(j, seq(1:ncol(data3))-1)
  print(k)
}

j <- as.matrix(j)

names <- as.character(data$colony_origin[data$evnt == 1][prop.missing < .9])
years <- as.character(data$year[data$evnt == 1][prop.missing < .9])

uniq <- unique(names)

#g = integer matrix of indexes of the group membership g[i[l]] linked to each ideal point
a <- NULL
for (i in 1:length(uniq)){
  a <- c(a, rep(i-1, length(names[names == uniq[i]])))
}
g <- as.matrix(a)

#z1 is a 1 (intercept) for all observations
z1 <- as.matrix(rep(1, length(g)))

#z2 is an index within each group (count within country)
countries <- as.character(data$country_name[data$evnt == 1][prop.missing < .9])
uniq.cntries <- unique(countries)
a <- NULL
for (i in 1:length(uniq.cntries)){
  a <- c(a, rep(i-1, length(countries[countries == uniq.cntries[i]])))
}
a <- dummy(a)
a <- a[,2:ncol(a)]
z2 <- as.matrix(a)

z <- as.matrix(cbind(z1, z2))

#integer matrix of indexes of the ideal point i[l] linked to each observed vote
i <- NULL
for (k in 1:nrow(data3)){
  i <- c(i, rep(k-1, ncol(data3)))
  print(k)
}
i <- as.matrix(i)

###########
###########
ND <- 193

NG <- length(uniq)

NI <- nrow(data3)

NJ <- ncol(data3)

NL <- length(y)

hier.const <- list(y=y, i=i, j=j, g=g, z=z, ND = ND, NG = NG, NI = NI, NJ = NJ, NL = NL)

#####STARTS

alpha <- as.matrix(rep(0, ncol(data3)))

beta <- as.matrix(rep(0, ncol(data3)))

gamma <- matrix(1, nrow = nrow(data3), ncol = 193)

eta <- as.matrix(rep(0, nrow(data3)))

sigma <- as.matrix(rep(0, length(uniq)))

starts <- list(alpha = alpha, beta = beta, gamma = gamma, eta = eta, sigma = sigma)

#####PRIORS

beta.mu <- as.matrix(c(0,0))

beta.sigma <- as.matrix(cbind(c(25, 0), c(0, 25)))

gamma.mu <- as.matrix(rep(0, 193))

gamma.sigma <- matrix(0, nrow = 193, ncol = 193)

diag(gamma.sigma) <- 25

sigma.s <- as.matrix(1e-08)

sigma.v <- as.matrix(1e+08)

priors <- list(beta.mu = beta.mu, beta.sigma = beta.sigma, gamma.mu = gamma.mu, gamma.sigma = gamma.sigma, sigma.s = sigma.s, sigma.v = sigma.v)

#RUN THE MODEL
set.seed(4227492)
lout <- hierIRT(.data = hier.const, .starts = starts, .priors = priors, 
                .control = list(threads = 1, verbose = TRUE))

means <- lout$means$x_implied
means <- as.data.frame(means)

names <- as.character(data$country_name[data$evnt == 1][prop.missing < .9])
ccode <- as.character(data$cowcode[data$evnt == 1][prop.missing < .9])
year <- as.character(data$year[data$evnt == 1][prop.missing < .9])

means$name <- names
means$ccode <- ccode
means$year <- year

#SAVE THE DATA
write.csv(means, "all_countries_hierarchical_colonial.csv")
